1 Data input, cleaning and pre-processing

Load protein abundance data, pre-process them into a format suitable for network analysis, and clean the data by removing obvious outlier samples as well as proteins and samples with excessive numbers of missing entries.

1.a Loading expression data

The expression data is contained in the file:

  • 1-9_A1_A2_A3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_A1vsA2vsA3.xlsx that comes with this tutorial.
  • 10-14_C1_C2_C3__precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_C1vsC2vsC3.xlsx
  • 1-14_A1_A2_A3_C1_C2_C3_precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_AvsC_abundances_indiv: Contains protein abundance in both groups.

Files grouped by samples, not share same proteins. This is one important point to discuss. One aporximation is consider missing protein with 0 abundance. Also it can be considered only the shared proetins.

Table with phenotype is:

datatable_jm<-function(x,column=NULL){
  library(DT)
datatable(
x,
extensions = 'Buttons',
filter = list(position = 'top', clear = T),
options = list(dom = 'Blfrtip',
scrollX = TRUE,
scrollY = T,
scrollCollapse = TRUE,
# buttons = list(list(extend = 'colvis')),
buttons = list(list(extend = 'colvis'),'copy', 'print',
list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download')),
columnDefs = list(list(visible=FALSE, targets=column))))
  }
library(readxl)

# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
# Read in the female liver data set
pheno <- read.csv("20240118-data(1).csv")
pheno$Group_num <- pheno$Group
pheno$Group_num <- gsub("NaCl", 1, pheno$Group_num)
pheno$Group_num <- gsub("PRP", 0, pheno$Group_num)
pheno$Group_num<-as.numeric(pheno$Group_num)

datatable_jm(pheno)

Variable of interest Group. To performe WGCNA analysis needs to be in numeric format. Moduls with posstitive correaltion means that are associated with Group_num= 1, NaCl

# load("RESULTATS/OBJECTES/data_abundance")
data_abundance_raw <- read_xlsx("./Gener/1-14_A1_A2_A3_C1_C2_C3_precol2cm_col75cm_top15_70000_3e6_50_35000_5e4_100_iw4_excl40_2h_200nlmin_1ul_AvsC_abundances_indiv(1).xlsx")
dim(data_abundance_raw)
## [1] 1183  114
#dim(data_abundance_raw)
data_abundance_raw_1 <- (data_abundance_raw)[grep("Abundances [(]Normalized)", colnames(data_abundance_raw))]
#dim(data_abundance_raw_1)
# data_abundance_raw_1 Aquesta matriu la fare servir per el WGCNA
data_abundance_raw_1 <- data.frame(data_abundance_raw_1)
rownames(data_abundance_raw_1) <- data_abundance_raw$Accession

# Arreglar els noms de les mostres

colnames(data_abundance_raw_1) <- gsub("Abundances..Normalized...", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..1..1..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..1..2..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..2..1..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..2..2..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..3..1..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("..precipitation..EVs..n.a..3..2..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub(".Sample..", "", colnames(data_abundance_raw_1))
colnames(data_abundance_raw_1) <- gsub("[.]", "_", colnames(data_abundance_raw_1))
#dim(data_abundance_raw_1)
data_abundance_raw_1 <- data_abundance_raw_1[, -grep("F65", colnames(data_abundance_raw_1))]
#dim(data_abundance_raw_1)
data_abundance_raw_1 <- data_abundance_raw_1[, -grep("F66", colnames(data_abundance_raw_1))]
#dim(data_abundance_raw_1)
data_abundance_raw_1 <- data_abundance_raw_1[, -grep("F75", colnames(data_abundance_raw_1))]
#dim(data_abundance_raw_1)

data_abundance_raw_1 <- data_abundance_raw_1[, -grep("76", colnames(data_abundance_raw_1))]

#dim(data_abundance_raw_1)

pheno <- data.frame(name = colnames(data_abundance_raw_1))
pheno$Sample <- NA
pheno$Group <- NA
pheno <- read.csv("20240118-data(1).csv")

pheno <- pheno[-grep("F65", pheno$name), ]
pheno <- pheno[-grep("F66", pheno$name), ]
pheno <- pheno[-grep("F75", pheno$name), ]
pheno <- pheno[-grep("F76", pheno$name), ]
pheno$Sample_ID <- unlist(lapply(pheno$name, function(x) strsplit(x, "_")[[1]][1]))
#dim(pheno)
library(reshape2)
data_abundance_raw_2<-data_abundance_raw_1
data_abundance_raw_2$protein<-rownames(data_abundance_raw_2)  
as<-melt(data_abundance_raw_2)
library(dplyr)
as<-merge(as,pheno,by.x="variable",by.y="name",all.x = T)
as<-
as %>%group_by(protein,Sample) %>% 
  mutate(value_mean=mean(value))


as<-as %>% distinct(Sample,protein,.keep_all = T)

as<-as[,colnames(as)%in%c("variable","Sample","protein","value_mean")]

data_abundance_raw_3 <- dcast(as,protein   ~ variable  , value.var="value_mean")

rownames(data_abundance_raw_3)<-data_abundance_raw_3$protein
data_abundance_raw_3<-data_abundance_raw_3[,-1]

# colnames(data_abundance_raw_3)<-unique(as$Sample)
# Hi ha NA. Substituir per 0.01
#table(is.na(data_abundance_raw_3))
data_abundance_raw_3[is.na(data_abundance_raw_3)] <- 0.01

datExpr0 <- as.data.frame(data_abundance_raw_3)


rownames(datExpr0) <- rownames(datExpr0)

# datExpr0<-datExpr0[,-1]
datExpr0 <- t(datExpr0)
#dim(datExpr0)
rownames(datExpr0)<-unique(as$Sample)

The expression data set contains 6 samples. Note that each row corresponds to a protein and column to a sample.

1.b Checking data for excessive missing values and identification of outlier microarray samples

We first check for proteins and samples with too many missing values:

gsg <- goodSamplesGenes(datExpr0, verbose = 3)
##  Flagging genes and samples with too many missing values...
##   ..step 1
##   ..Excluding 21 genes from the calculation due to too many missing samples or zero variance.
##   ..step 2
gsg$allOK
## [1] FALSE
if (!gsg$allOK) {
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes) > 0) {
    printFlush(paste("Removing genes:", paste(colnames(datExpr0)[!gsg$goodGenes], collapse = ", ")))
  }
  if (sum(!gsg$goodSamples) > 0) {
    printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")))
  }
  # Remove the offending genes and samples from the data:
  datExpr0 <- datExpr0[gsg$goodSamples, gsg$goodGenes]
}
## Removing genes: O15173, P06493, P08670, P0CG38, P0CG39, P0DPH8
## P0DPH7, P11217, P12036, P50993, P62745, P68133, Q12840, Q5T0T0, Q5VST9, Q6A162, Q6ZU15, Q7Z4I7, Q8NF91, Q9NYU2, Q9P2T1, Q9Y4G6

Next we cluster the samples (in contrast to clustering proteins that will come later) to see if there are any obvious outliers.

dev.off()
## null device 
##           1
#dim(datExpr0)

sampleTree <- hclust(dist(datExpr0), method = "average")
par(cex = 0.6)
par(mar = c(0, 4, 2, 0))

plot(sampleTree,
  main = "Sample clustering to detect outliers", sub = "", xlab = "", cex.lab = 1.5,
  cex.axis = 1.5, cex.main = 2
)

Les mostres A2 (F65 i F66 ) i la C1 (F75 i F76) ja estan eliminades.

1.c Loading clinical trait data

# Create datTraits
allTraits <- data.frame(pheno)

nameSamples <- rownames(datExpr0)
datTraits <- data.frame(name = nameSamples)
# PL-EV1 -> llista de plaquetes ->NaCL

datTraits$nom_uni <- datTraits$name

# datTraits$nom_uni[grep("Lisat.Plaquetes",datTraits$name)]<-"NaCl"

traitRows <- match(nameSamples, allTraits$Sample)
datTraits <- allTraits[traitRows, ]
rownames(datTraits) <- allTraits[traitRows, 1]
collectGarbage()

We now have the expression data in the variable datExpr, and the corresponding clinical traits in the variable datTraits.

Sample dendrogram

Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram

# Re-cluster samples
sampleTree2 <- hclust(dist(datExpr0), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry

datTraits$Group_num <- datTraits$Group
datTraits$Group_num <- gsub("NaCl", 1, datTraits$Group_num)
datTraits$Group_num <- gsub("PRP", 0, datTraits$Group_num)
traitColors <- numbers2colors(as.numeric(datTraits$Group_num), signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
  groupLabels = names(datTraits),
  main = "Sample dendrogram and trait heatmap"
)

Batch effect?

Groups are very different (between other things, are not share proteins).

library(sva)


save(datExpr0,file = "./matriu_unificada")
datExpr0_norm <-
  ComBat(
    t(datExpr0),
    batch = c(rep("ap", 3), rep("PL", (3))),
    mod = NULL,
    par.prior = TRUE,
    prior.plots = FALSE,
    mean.only = FALSE,
    ref.batch = NULL,
    BPPARAM = bpparam("SerialParam")
  )
datExpr0_norm <- t(datExpr0_norm)

Sample dendrogram

Before we continue with network construction and module detection, we visualize how the clinical traits relate to the sample dendrogram

# Re-cluster samples

sampleTree2_norm <- hclust(dist(datExpr0_norm), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry

datTraits$Group_num <- datTraits$Group
datTraits$Group_num <- gsub("NaCl", 1, datTraits$Group_num)
datTraits$Group_num <- gsub("PRP", 0, datTraits$Group_num)
traitColors <- numbers2colors(as.numeric(datTraits$Group_num), signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2_norm, traitColors,
  groupLabels = names(datTraits),
  main = "Sample dendrogram and trait heatmap"
)

2.1 Automatic network construction and module detection

2.a.1 Choosing the soft-thresholding power: analysis of network topology

Constructing a weighted gene network entails the choice of the soft thresholding power β to which co-expression similarity is raised to calculate adjacency [1]. The authors of [1] have proposed to choose the soft thresholding power based on the criterion of approximate scale-free topology. We refer the reader to that work for more details; here we illustrate the use of the function pickSoftThreshold that performs the analysis of network topology and aids the user in choosing a proper soft-thresholding power.

# Choose a set of soft-thresholding powers
powers = c(c(1:50), seq(from = 52, to=100, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr0, 
                         networkType = "unsigned",
                        powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1162.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1162 of 1162
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.7630  3.4400        0.94300  539.00    553.00  699.0
## 2      2   0.6180  1.3600        0.72500  343.00    342.00  513.0
## 3      3   0.3530  0.4860        0.30600  251.00    236.00  421.0
## 4      4   0.0270  0.0882       -0.19000  199.00    177.00  367.0
## 5      5   0.0435 -0.1130       -0.16900  165.00    139.00  331.0
## 6      6   0.1950 -0.2470        0.00691  141.00    114.00  304.0
## 7      7   0.3070 -0.3520        0.11800  124.00     98.20  283.0
## 8      8   0.3950 -0.4290        0.22800  111.00     88.20  266.0
## 9      9   0.4440 -0.4840        0.28600   99.90     78.60  251.0
## 10    10   0.5070 -0.5500        0.36600   91.10     70.20  239.0
## 11    11   0.5590 -0.5860        0.43300   83.80     63.00  228.0
## 12    12   0.6120 -0.6180        0.50100   77.60     57.00  219.0
## 13    13   0.6230 -0.6480        0.51700   72.30     52.10  210.0
## 14    14   0.6420 -0.6800        0.54400   67.60     47.70  202.0
## 15    15   0.6600 -0.7060        0.57200   63.50     44.20  195.0
## 16    16   0.7030 -0.7270        0.62800   59.90     41.00  189.0
## 17    17   0.7430 -0.7530        0.68100   56.60     38.00  183.0
## 18    18   0.7650 -0.7750        0.70900   53.70     35.50  178.0
## 19    19   0.7740 -0.7870        0.72100   51.10     33.10  173.0
## 20    20   0.7910 -0.8100        0.74700   48.70     30.90  168.0
## 21    21   0.7950 -0.8200        0.75600   46.50     28.80  164.0
## 22    22   0.7970 -0.8280        0.75900   44.50     27.20  160.0
## 23    23   0.8060 -0.8410        0.76700   42.60     25.70  156.0
## 24    24   0.8110 -0.8540        0.76700   40.90     24.20  152.0
## 25    25   0.8280 -0.8590        0.79000   39.30     23.10  149.0
## 26    26   0.8340 -0.8640        0.79700   37.90     22.00  146.0
## 27    27   0.8400 -0.8720        0.80400   36.50     21.10  142.0
## 28    28   0.8440 -0.8730        0.81000   35.20     20.30  140.0
## 29    29   0.8490 -0.8840        0.81900   34.00     19.40  137.0
## 30    30   0.8590 -0.8890        0.83100   32.90     18.60  134.0
## 31    31   0.8650 -0.8960        0.83900   31.80     18.00  131.0
## 32    32   0.8670 -0.9020        0.84200   30.80     17.20  129.0
## 33    33   0.8700 -0.9120        0.84800   29.90     16.50  126.0
## 34    34   0.8710 -0.9200        0.84800   29.00     15.90  124.0
## 35    35   0.8800 -0.9310        0.85800   28.20     15.40  122.0
## 36    36   0.8900 -0.9350        0.87000   27.40     14.90  120.0
## 37    37   0.8960 -0.9390        0.87700   26.60     14.40  118.0
## 38    38   0.9010 -0.9490        0.88300   25.90     14.00  116.0
## 39    39   0.9070 -0.9550        0.89300   25.20     13.50  114.0
## 40    40   0.9190 -0.9550        0.90700   24.60     13.00  112.0
## 41    41   0.9150 -0.9610        0.90200   23.90     12.60  110.0
## 42    42   0.9200 -0.9640        0.90700   23.40     12.20  109.0
## 43    43   0.9250 -0.9710        0.91500   22.80     11.80  107.0
## 44    44   0.9300 -0.9750        0.92000   22.20     11.50  105.0
## 45    45   0.9340 -0.9780        0.92500   21.70     11.20  104.0
## 46    46   0.9330 -0.9830        0.92400   21.20     10.80  102.0
## 47    47   0.9270 -0.9930        0.91700   20.70     10.50  101.0
## 48    48   0.9250 -0.9950        0.91400   20.30     10.20   99.3
## 49    49   0.9330 -1.0000        0.92400   19.80      9.91   98.0
## 50    50   0.9360 -1.0100        0.92900   19.40      9.61   96.6
## 51    52   0.9440 -1.0100        0.93800   18.60      9.20   94.0
## 52    54   0.9460 -1.0200        0.94000   17.90      8.73   91.6
## 53    56   0.9470 -1.0200        0.94000   17.20      8.33   89.3
## 54    58   0.9490 -1.0300        0.94300   16.50      7.95   87.1
## 55    60   0.9440 -1.0400        0.93700   15.90      7.60   85.0
## 56    62   0.9420 -1.0500        0.93400   15.40      7.32   83.0
## 57    64   0.9440 -1.0500        0.93500   14.80      7.01   81.0
## 58    66   0.9420 -1.0600        0.93100   14.30      6.70   79.2
## 59    68   0.9420 -1.0700        0.93000   13.90      6.39   77.5
## 60    70   0.9520 -1.0700        0.94200   13.40      6.08   75.8
## 61    72   0.9430 -1.0700        0.92900   13.00      5.84   74.3
## 62    74   0.9480 -1.0800        0.93600   12.60      5.61   72.8
## 63    76   0.9420 -1.0800        0.92700   12.20      5.40   71.4
## 64    78   0.9420 -1.0900        0.92600   11.90      5.22   70.1
## 65    80   0.9440 -1.0900        0.93000   11.50      5.01   68.8
## 66    82   0.9550 -1.1000        0.94300   11.20      4.83   67.6
## 67    84   0.9510 -1.1000        0.93700   10.90      4.62   66.4
## 68    86   0.9340 -1.1100        0.91600   10.60      4.45   65.2
## 69    88   0.9290 -1.1200        0.90900   10.30      4.29   64.1
## 70    90   0.9210 -1.1200        0.89800   10.10      4.15   63.1
## 71    92   0.9320 -1.1100        0.91200    9.82      4.02   62.0
## 72    94   0.9260 -1.1200        0.90600    9.58      3.88   61.0
## 73    96   0.9160 -1.1200        0.89200    9.35      3.76   60.1
## 74    98   0.9100 -1.1300        0.88500    9.12      3.64   59.1
## 75   100   0.9180 -1.1300        0.89500    8.91      3.52   58.2
# Plot the results:
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")

# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

2.a.2 One-step network construction and module detection

Constructing the gene network and identifying modules is now a simple function call:

pw<-12
net = blockwiseModules(datExpr0, 
                       power = pw,
                       networkType="unsigned",
                       # TOMType = "unsigned",
                       minModuleSize = 30,
                       reassignThreshold = 0,
                       mergeCutHeight = 0.25,
                       numericLabels = TRUE, 
                        # pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = "femaleMouseTOM",
verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will not use multithreading.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file femaleMouseTOM-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 1 genes from module 1 because their KME is too low.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...
table(net$colors)
## 
##   0   1   2   3   4   5   6   7   8   9 
##   3 378 203 160 119  93  64  52  48  42

We have chosen the soft thresholding power 12 , a relatively large minimum module size of 30, and a medium sensitivity (deepSplit=2) to cluster splitting. The parameter mergeCutHeight is the threshold for merging of modules. We have also instructed the function to return numeric, rather than color, labels for modules, and to save the Topological Overlap Matrix. The output of the function may seem somewhat cryptic, but it is easy to use. For example, net\(colors contains the module assignment, and net\)MEs contains the module eigengenes of the modules.

The dendrogram can be displayed together with the color assignment using the following code:

# open a graphics window
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

We note that if the user would like to change some of the tree cut, module membership, and module merging criteria, the package provides the function recutBlockwiseTrees that can apply modified criteria without having to recompute the network and the clustering dendrogram. This may save a sub-stantial amount of time. We now save the module assignment and module eigengene information necessary for subsequent analysis.

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
file = "FemaleLiver-02-networkConstruction-auto.RData")

3 Relating modules to external clinical trait

3a Quantifying module–trait associations

In this analysis we would like to identify modules that are significantly associated with the measured clinical traits. Since we already have a summary profile (eigengene) for each module, we simply correlate eigengenes with external traits and look for the most significant associations

# Define numbers of genes and samples
nGenes = ncol(datExpr0)
nSamples = nrow(datExpr0)
# Recalculate MEs with color labels
MEs0 = moduleEigengenes(datExpr0, moduleColors)$eigengenes
MEs = orderMEs(MEs0)

moduleTraitCor = cor(MEs, as.numeric(datTraits$Group_num), use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value:

# Will display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits)[4],
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))

The analysis identifies the 1 significant module–trait associations. We will concentrate on GRUP as the trait of interest.

3.b Gene relationship to trait and important modules: Gene Significance and Module Membership

We quantify associations of individual genes with our trait of interest by defining Gene Significance GS as (the absolute value of) the correlation between the gene and the trait. For each module, we also define a quantitative measure of module membership MM as the correlation of the module eigengene and the gene expression profile. This allows us to quantify the similarity of all proteins on the array to every module

# Define variable weight containing the weight column of datTrait
grup = as.data.frame(as.numeric(datTraits$Group_num));
names(grup) = "grup"
# names (colors) of the modules
modNames = substring(names(MEs), 3)
geneModuleMembership = as.data.frame(cor(datExpr0, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));


names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")
geneTraitSignificance = as.data.frame(cor(datExpr0, grup, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))
names(geneTraitSignificance) = paste("GS.", names(grup), sep="")
names(GSPvalue) = paste("p.GS.", names(grup), sep="")

##3.c Intramodular analysis: identifying genes with high GS and MM

Using the GS and MM measures, we can identify genes that have a high significance for weight as well as high module membership in interesting modules. As an example, we look at the brown module that has the highest association with weight. We plot a scatterplot of Gene Significance vs. Module Membership in the significant modules

moduls_int<-gsub("ME","",rownames(moduleTraitPvalue)[order(moduleTraitPvalue,decreasing = F)])
module = moduls_int[1]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

module = moduls_int[2]
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7)
par(mfrow = c(1,1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significance\n"),
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

GS and MM are correlated, illustrating that proteins highly significantly associated with a trait are often also the most important (central) elements of modules associated with the trait. The reader is encouraged to try this code with other significance trait/module correlation.

3.d Summary output of network analysis results

We have found modules with high association with our trait of interest, and have identified their central players by the Module Membership measure. We now merge this statistical information with protein annotation.

# colnames(datExpr0)
# colnames(datExpr0)[moduleColors=="green"]
library(clusterProfiler) 

library(dplyr)
library(DT)

ORA enrichment

gene<-colnames(datExpr0)[moduleColors==moduls_int[1]]
gene<-unlist(strsplit(gene,"\r\n"))

kegg<-enrichKEGG(gene,
                 organism = "hsa",
                 keyType = "uniprot",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)

assign(paste0("kegg_",moduls_int[1]),kegg)


library(org.Hs.eg.db)
go <- enrichGO(gene          = gene,
               
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",keyType = "UNIPROT",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
        readable      = TRUE)

assign(paste0("go_",moduls_int[1]),go)







gene<-colnames(datExpr0)[moduleColors==moduls_int[2]]
gene<-unlist(strsplit(gene,"\r\n"))

kegg<-enrichKEGG(gene,
                 organism = "hsa",
                 keyType = "uniprot",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)

assign(paste0("kegg_",moduls_int[2]),kegg)



go <- enrichGO(gene          = gene,
               
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",keyType = "UNIPROT",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
        readable      = TRUE)

assign(paste0("go_",moduls_int[2]),go)
cat("## KEGG")
## ## KEGG
datatable(get(paste0("kegg_",moduls_int[1]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[1],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
cat("## GO")
## ## GO
datatable(get(paste0("go_",moduls_int[1]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[1],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
cat("## KEGG")
## ## KEGG
datatable(get(paste0("kegg_",moduls_int[2]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[2],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
cat("## GO")
## ## GO
datatable(get(paste0("go_",moduls_int[2]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[2],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))

ORA enrichment WITH UNIVERSE

universe<-colnames(datExpr0)
gene<-colnames(datExpr0)[moduleColors==moduls_int[1]]
gene<-unlist(strsplit(gene,"\r\n"))

kegg<-enrichKEGG(gene,
                 universe = universe,
                 organism = "hsa",
                 keyType = "uniprot",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)

assign(paste0("kegg_",moduls_int[1]),kegg)


library(org.Hs.eg.db)
go <- enrichGO(gene          = gene,
               universe = universe,
               
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",keyType = "UNIPROT",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
        readable      = TRUE)

assign(paste0("go_",moduls_int[1]),go)







gene<-colnames(datExpr0)[moduleColors==moduls_int[2]]
gene<-unlist(strsplit(gene,"\r\n"))

kegg<-enrichKEGG(gene,
                 organism = "hsa",
                 universe = universe,
                 keyType = "uniprot",
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 qvalueCutoff = 0.2,
                 use_internal_data = FALSE)

assign(paste0("kegg_",moduls_int[2]),kegg)



go <- enrichGO(gene          = gene,
               universe = universe,
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",keyType = "UNIPROT",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
        readable      = TRUE)

assign(paste0("go_",moduls_int[2]),go)
cat("## KEGG")
## ## KEGG
datatable(get(paste0("kegg_",moduls_int[1]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[1],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
cat("## GO")
## ## GO
datatable(get(paste0("go_",moduls_int[1]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[1],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
cat("## KEGG")
## ## KEGG
datatable(get(paste0("kegg_",moduls_int[2]))@result %>% filter(p.adjust<=0.05),caption = moduls_int[2],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))
cat("## GO")
## ## GO
datatable(get(paste0("go_",moduls_int[2]))@result%>% filter(p.adjust<=0.05),caption = moduls_int[2],
          filter = "top",
          extensions = c('Buttons','FixedColumns'),
          options = list(
            dom = 'Blfrtip',
            scrollX = TRUE,
            scrollCollapse = TRUE,
            buttons = c('copy', 'csv', 'excel'),
            pageLength=5,
            lengthMenu=list(c(5,10,20,-1),c(5,10,20,"tot"))))

5 Visualization of networks within R

5.a Visualizing the gene network

One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create such network plots; Fig. 1 was created using the following code. This code can be executed only if the network was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If the networks were calculated using the block-wise approach, the user will need to modify this code to perform the visualization in each block separately. The modification is simple and we leave it as an exercise for the interested reader.

# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
dissTOM = 1-TOMsimilarityFromExpr(datExpr0, power = 9);
## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
plotTOM = dissTOM^7;
# Set diagonal to NA for a nicer plot
diag(plotTOM) = NA;
# Call the plot function
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

5.bVisualizing the network of eigengenes

# Recalculate module eigengenes
MEs = moduleEigengenes(datExpr0, moduleColors)$eigengenes
# Isolate weight from the clinical traits
grup = as.data.frame(as.numeric(datTraits$Group_num ))
names(grup) = "grup"
# Add the weight to existing module eigengenes

MET = orderMEs(cbind(MEs, grup))
# Plot the relationships among the eigengenes and the trait
sizeGrWindow(5,7.5);
par(cex = 0.9)
plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
= 90)

The function produces a dendrogram of the eigengenes and trait(s), and a heatmap of their relationships. To split the dendrogram and heatmap plots, we can use the following code

# Plot the dendrogram

par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
plotHeatmaps = F)

# Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
plotDendrograms = FALSE, xLabelsAngle = 90)

6 Exporting to Cytoscape

TOM <-1-dissTOM

modules = moduls_int

probes = colnames(datExpr0)
inModule = is.finite(match(moduleColors, modules));

modProbes = probes[inModule];
# modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
# Select the corresponding Topological Overlap
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)

dimnames(modTOM) = list(modProbes, modProbes)
# Export the network into edge and node list files Cytoscape can read
cyt = exportNetworkToCytoscape(modTOM,
edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
weighted = TRUE,
threshold = 0.02,
nodeNames = modProbes,
# altNodeNames = modGenes,
nodeAttr = moduleColors[inModule])
library(igraph)

adj <- modTOM
adj[adj > 0.3] = 1
# dim(adj)
adj[adj != 1] = 0
network <- graph.adjacency(adj)
network <- simplify(network)  # removes self-loops
results <- net


V(network)$color <- moduleColors[inModule]
V(network)$size<-6
par(mar=c(0,0,0,0))
# remove unconnected nodes
network <- delete.vertices(network, degree(network)==0)
igraph::plot.igraph(network,
                    # mark.groups = T,
                    layout=layout.fruchterman.reingold(network), 
     edge.arrow.size = 0.1)